Loading Necessary Libraries¶
In [2]:
import matplotlib.pyplot as plt
import numpy as np
from numpy.random import randint
import pandas as pd
%matplotlib inline
Data Visualization¶
Types of Graphs covered:¶
- Line Graph
- Basics
- Subplots
- Bar Chart
- Pie Chart
- Box & Whiskers Plot
Line Graph (Basics)¶
In [5]:
x = [1, 2, 3, 8, 9, 23]
y = [5, 13, 10, 11, 45, 27]
plt.style.use('dark_background')
plt.rcParams['figure.figsize'] = (10,4)
#Line 1
#Keyword Argument Notation
#plt.plot(x,y, label='2x', color='red', linewidth=2, marker='.', linestyle='--', markersize=10, markeredgecolor='blue')
#Sorthand Notation
#format = '[color][marker][line]'
plt.plot(x,y, 'g^--', label = 'A simple line', linewidth = 2)
#Line 2
# select interval we want to plot points at
x2 = np.arange(0,8.5,0.5)
# Plot part of the graph as line
plt.plot(x2[:10], x2[:10]**2, 'r', label = 'X^2')
plt.plot(x2[9:], x2[9:]**2, 'r--')
#add title to graph
plt.title('Line Graph', loc = 'left', fontdict = {'fontname': 'Times New Roman', 'color': 'white', 'fontsize' : '22'})
#add title to axis
plt.xlabel('X-axis', fontdict = {'color': 'white', 'fontsize' : '16'})
plt.ylabel('Y-axis', fontdict = {'color': 'white', 'fontsize' : '16'})
#Ticks are the markers denoting data points on axes.
#plt.xticks([0,2,4,6,8,10,12,14,16,18,20,22,24])
#plt.yticks([5,10,15,20,25,30,35,40,45])
#add legend
plt.legend(loc = 0)
#save graph
plt.savefig('mygraph.png', dpi = 200)
#show graph
plt.show()
Multiple Plots in the same Canvas : Subplots¶
Two Plots Together¶
In [141]:
x = np.linspace(0,10,20)
x
Out[141]:
array([ 0. , 0.52631579, 1.05263158, 1.57894737, 2.10526316,
2.63157895, 3.15789474, 3.68421053, 4.21052632, 4.73684211,
5.26315789, 5.78947368, 6.31578947, 6.84210526, 7.36842105,
7.89473684, 8.42105263, 8.94736842, 9.47368421, 10. ])
In [142]:
y = randint(1, 50, 20)
y
Out[142]:
array([45, 29, 17, 25, 41, 48, 9, 12, 12, 39, 3, 1, 12, 47, 49, 35, 6,
17, 1, 33])
In [6]:
x = np.linspace(0,10,20)
y = randint(1, 50, 20)
plt.style.use('dark_background')
plt.rcParams['figure.figsize'] = (10,4)
# Plot 1
plt.subplot(1,2,1)
plt.plot(x, y, 'c:o')
plt.title('A Cyan Plot', fontdict = {'fontname': 'Times New Roman', 'color' : 'cyan'})
#Plot 2
plt.subplot(1,2,2)
plt.plot(x, y, 'r--_')
plt.title('A Red Plot', fontdict = {'fontname': 'Times New Roman', 'color' : 'red'})
#Title at Top
plt.suptitle('Two Plots Together', fontdict = {'fontname': 'Times New Roman', 'color': 'white'})
plt.show()
Three Plots Together¶
In [17]:
x = np.linspace(0,10,20)
y = randint(1, 50, 20)
plt.style.use('dark_background')
plt.rcParams['figure.figsize'] = (12,4)
# Plot 1
plt.subplot(1,3,1)
plt.plot(x, y, 'c:o')
plt.title('A Cyan Plot', fontdict = {'fontname': 'fantasy', 'color' : 'cyan'})
#Plot 2
plt.subplot(1,3,2)
plt.plot(x, y**y, 'r--_')
plt.title('A Red Plot', fontdict = {'fontname': 'fantasy', 'color' : 'red'})
#Plot 3
plt.subplot(1,3,3)
plt.plot(x, y*x, 'g-^')
plt.title('A Green Plot', fontdict = {'fontname': 'fantasy', 'color' : 'green'})
#Labeling
plt.suptitle('Three Plots Together', fontdict = {'fontname': 'fantasy', 'color': 'white'})
plt.savefig('threeplots.png')
plt.show()
Different Methods of Creating Subplots¶
Object Oriented Method¶
In [199]:
fig = plt.figure(figsize= (10,4))
ax1 = fig.add_axes([0,0,1,1])
ax2 = fig .add_axes([0.1, 0.1, 0.4, 0.3])
ax1.plot(x, y)
ax2.plot(x, y*2)
plt.show()
Brute Force Method to plot more than one graphs¶
In [200]:
fig, ax = plt.subplots(1,2, figsize= (10,4))
ax[0].plot(x, y, 'b')
ax[1].plot(x, y*x, 'r-.')
plt.show()
For Loop Method¶
In [201]:
fig , ax = plt.subplots(1,2, figsize= (10,4))
col = ['g', 'm']
data = [y, y**y]
for i, axes in enumerate(ax):
axes.plot(x, data[i], col[i])
fig.tight_layout()
Setting X and Y limits of subplots¶
In [162]:
fig, ax = plt.subplots(1,3, figsize = (12, 4))
#Get three plots simultaneously
ax[0].plot(x, y, x, y*x)
ax[1].plot(x, y**2, 'r')
#setting limits to y axis
ax[1].set_ylim([0,1000])
ax[2].plot(x, y, x, y*x)
#setting limits to x and y axes
ax[2].set_ylim([0,300])
ax[2].set_xlim([0,5])
plt.show()
Plotting Log(x) on a subplot¶
In [180]:
fig, ax = plt.subplots(1,2, figsize = (10,4))
ax[0].plot(x, y, x, y**2)
ax[1].plot(x, np.exp(x), 'w')
#plotting log of x
ax[1].set_yscale('log')
plt.show()
Setting Xticks and Yticks and Changing their Labels¶
In [193]:
fig, ax = plt.subplots(figsize = (10,4))
ax.plot(x, y)
ax.set_xticks([1,2,5,10])
ax.set_xticklabels([r'a', r'B', r'$\alpha$', r'$\delta$'], fontsize = 20)
ax.set_yticks([1,15,25,50])
plt.show()
Setting Y-axis Label as Scientific Notation¶
In [196]:
from matplotlib import ticker
In [198]:
fig, ax = plt.subplots(figsize = (10,4))
ax.plot(x, y)
ax.set_title('Scientific Notation')
formatter = ticker.ScalarFormatter(useMathText = True)
formatter.set_scientific(True)
formatter.set_powerlimits((-1,-1))
ax.yaxis.set_major_formatter(formatter)
plt.show()
Bar Chart (Basics)¶
In [8]:
labels = ['A', 'B', 'C']
values = [6, 8, 3]
plt.style.use('dark_background')
plt.rcParams['figure.figsize'] = (10,4)
bars = plt.bar(labels, values, align = 'edge', edgecolor = 'indigo', facecolor = 'pink')
#adding title
plt.title('Bar Graph', loc = 'right', fontdict = {'fontname': 'Times New Roman', 'color': 'white', 'fontsize': '30'})
#adding label to axis
plt.xlabel('X-axis', fontdict = {'color': 'white','fontsize' : '16'})
plt.ylabel('Y-axis', fontdict = {'color': 'white','fontsize' : '16'})
#add patterns to bars
bars[0].set_hatch('O')
bars[1].set_hatch('*')
bars[2].set_hatch('+')
plt.show()
Real World Examples¶
Line Chart¶
- Data selected for analysis
In [6]:
gas = pd.read_csv('gas_prices.csv')
gas
Out[6]:
| Year | Australia | Canada | France | Germany | Italy | Japan | Mexico | South Korea | UK | USA | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1990 | NaN | 1.87 | 3.63 | 2.65 | 4.59 | 3.16 | 1.00 | 2.05 | 2.82 | 1.16 |
| 1 | 1991 | 1.96 | 1.92 | 3.45 | 2.90 | 4.50 | 3.46 | 1.30 | 2.49 | 3.01 | 1.14 |
| 2 | 1992 | 1.89 | 1.73 | 3.56 | 3.27 | 4.53 | 3.58 | 1.50 | 2.65 | 3.06 | 1.13 |
| 3 | 1993 | 1.73 | 1.57 | 3.41 | 3.07 | 3.68 | 4.16 | 1.56 | 2.88 | 2.84 | 1.11 |
| 4 | 1994 | 1.84 | 1.45 | 3.59 | 3.52 | 3.70 | 4.36 | 1.48 | 2.87 | 2.99 | 1.11 |
| 5 | 1995 | 1.95 | 1.53 | 4.26 | 3.96 | 4.00 | 4.43 | 1.11 | 2.94 | 3.21 | 1.15 |
| 6 | 1996 | 2.12 | 1.61 | 4.41 | 3.94 | 4.39 | 3.64 | 1.25 | 3.18 | 3.34 | 1.23 |
| 7 | 1997 | 2.05 | 1.62 | 4.00 | 3.53 | 4.07 | 3.26 | 1.47 | 3.34 | 3.83 | 1.23 |
| 8 | 1998 | 1.63 | 1.38 | 3.87 | 3.34 | 3.84 | 2.82 | 1.49 | 3.04 | 4.06 | 1.06 |
| 9 | 1999 | 1.72 | 1.52 | 3.85 | 3.42 | 3.87 | 3.27 | 1.79 | 3.80 | 4.29 | 1.17 |
| 10 | 2000 | 1.94 | 1.86 | 3.80 | 3.45 | 3.77 | 3.65 | 2.01 | 4.18 | 4.58 | 1.51 |
| 11 | 2001 | 1.71 | 1.72 | 3.51 | 3.40 | 3.57 | 3.27 | 2.20 | 3.76 | 4.13 | 1.46 |
| 12 | 2002 | 1.76 | 1.69 | 3.62 | 3.67 | 3.74 | 3.15 | 2.24 | 3.84 | 4.16 | 1.36 |
| 13 | 2003 | 2.19 | 1.99 | 4.35 | 4.59 | 4.53 | 3.47 | 2.04 | 4.11 | 4.70 | 1.59 |
| 14 | 2004 | 2.72 | 2.37 | 4.99 | 5.24 | 5.29 | 3.93 | 2.03 | 4.51 | 5.56 | 1.88 |
| 15 | 2005 | 3.23 | 2.89 | 5.46 | 5.66 | 5.74 | 4.28 | 2.22 | 5.28 | 5.97 | 2.30 |
| 16 | 2006 | 3.54 | 3.26 | 5.88 | 6.03 | 6.10 | 4.47 | 2.31 | 5.92 | 6.36 | 2.59 |
| 17 | 2007 | 3.85 | 3.59 | 6.60 | 6.88 | 6.73 | 4.49 | 2.40 | 6.21 | 7.13 | 2.80 |
| 18 | 2008 | 4.45 | 4.08 | 7.51 | 7.75 | 7.63 | 5.74 | 2.45 | 5.83 | 7.42 | 3.27 |
- Line Chart showing changes in the prices of gas (USD/gallon)
In [7]:
#add title to graph
plt.title('Change in Gas Prices', loc = 'center', fontdict = {'fontname': 'century','color': 'white', 'fontsize': '25'})
#changing style of the graph
plt.style.use('dark_background')
#add title to axislightblue
plt.xlabel('Change over Years', fontdict = {'fontname': 'century', 'color': 'white','fontsize' : '17'})
plt.ylabel('Prices (USD/Gallon)', fontdict = {'fontname': 'century', 'color': 'white','fontsize' : '17'})
#plotting all countries in the graph
for country in gas:
if country != 'Year':
plt.plot(gas.Year, gas[country], marker = '.')
plt.xticks(gas.Year[::2])
#[::2] allows to have two years ga in the xticks
#adding legend
plt.legend(['Australia', 'Canada', 'France', 'Germany', 'Italy', 'Japan', 'Mexico', 'South Korea', 'UK', 'USA'], loc = 0, bbox_to_anchor=(1,1))
#bbox_to_anchor=(1,1) puts the legend outside the graph
plt.savefig('gas_prices.png', dpi= 300, bbox_inches='tight')
#bbox_inches='tight' prevents the graph from getting cropped
plt.show()
- Line Chart showing changes in the prices of gas (USD/gallon) in USA, Italy , France and Germany only
In [8]:
#add title to graph
plt.title('Change in Gas Prices', loc = 'center', fontdict = {'fontname': 'century', 'color': 'white', 'fontsize': '25'})
#changing style of the graph
plt.style.use('dark_background')
#add title to axis
plt.xlabel('Change over Years', fontdict = {'fontname': 'century', 'color': 'white','fontsize' : '17'})
plt.ylabel('Prices (USD/Gallon)', fontdict = {'fontname': 'century', 'color': 'white','fontsize' : '17'})
#Method 1
# plt.plot(gas.Year, gas.USA, 'b.--')
# plt.plot(gas.Year, gas.Canada, 'r.-')
# plt.plot(gas.Year, gas.France, 'y.:')
# plt.plot(gas.Year, gas.Germany, 'g.:')
#Method 2
specific_countries = ['USA', 'France', 'Germany', 'Italy']
for country in gas:
plt.plot(gas.Year, gas[specific_countries], marker = '.')
#adding blank year in the graph and customizing xticks
plt.xticks(gas.Year[::2].tolist()+[2010])
#adding legend
plt.legend(['USA', 'France', 'Germany', 'Italy'], loc = 0, bbox_to_anchor=(1,1))
plt.savefig('gas_prices_specific.png', dpi= 300, facecolor = 'white', bbox_inches='tight')
plt.show()
Histogram¶
- Data selected for Analysis
In [10]:
fifa = pd.read_csv('fifa_data.csv')
fifa.head()
Out[10]:
| Unnamed: 0 | ID | Name | Age | Photo | Nationality | Flag | Overall | Potential | Club | ... | Composure | Marking | StandingTackle | SlidingTackle | GKDiving | GKHandling | GKKicking | GKPositioning | GKReflexes | Release Clause | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 158023 | L. Messi | 31 | https://cdn.sofifa.org/players/4/19/158023.png | Argentina | https://cdn.sofifa.org/flags/52.png | 94 | 94 | FC Barcelona | ... | 96.0 | 33.0 | 28.0 | 26.0 | 6.0 | 11.0 | 15.0 | 14.0 | 8.0 | €226.5M |
| 1 | 1 | 20801 | Cristiano Ronaldo | 33 | https://cdn.sofifa.org/players/4/19/20801.png | Portugal | https://cdn.sofifa.org/flags/38.png | 94 | 94 | Juventus | ... | 95.0 | 28.0 | 31.0 | 23.0 | 7.0 | 11.0 | 15.0 | 14.0 | 11.0 | €127.1M |
| 2 | 2 | 190871 | Neymar Jr | 26 | https://cdn.sofifa.org/players/4/19/190871.png | Brazil | https://cdn.sofifa.org/flags/54.png | 92 | 93 | Paris Saint-Germain | ... | 94.0 | 27.0 | 24.0 | 33.0 | 9.0 | 9.0 | 15.0 | 15.0 | 11.0 | €228.1M |
| 3 | 3 | 193080 | De Gea | 27 | https://cdn.sofifa.org/players/4/19/193080.png | Spain | https://cdn.sofifa.org/flags/45.png | 91 | 93 | Manchester United | ... | 68.0 | 15.0 | 21.0 | 13.0 | 90.0 | 85.0 | 87.0 | 88.0 | 94.0 | €138.6M |
| 4 | 4 | 192985 | K. De Bruyne | 27 | https://cdn.sofifa.org/players/4/19/192985.png | Belgium | https://cdn.sofifa.org/flags/7.png | 91 | 92 | Manchester City | ... | 88.0 | 68.0 | 58.0 | 51.0 | 15.0 | 13.0 | 5.0 | 10.0 | 13.0 | €196.4M |
5 rows × 89 columns
- Distribution of Players Overall Skills in FIFA 2018
In [11]:
bins = [70,80,90,100]
#add title to graph
plt.title('Distribution of Players Overall Skills', loc = 'center', fontdict = {'fontname': 'century', 'color': 'white', 'fontsize': '22'})
#add title to axis
plt.xlabel('Overall Skill Points', fontdict = {'fontname': 'century', 'color': 'white','fontsize' : '15'})
plt.ylabel('Number of Players', fontdict = {'fontname': 'century', 'color': 'white','fontsize' : '15'})
plt.style.use('dark_background')
plt.rcParams['figure.figsize'] = (10,4)
plt.hist(fifa.Overall, bins = bins, color = 'cyan')
plt.show()
Pie Chart¶
- Preferred Foot by FIFA Players
In [13]:
fifa['Preferred Foot']
Out[13]:
0 Left
1 Right
2 Right
3 Right
4 Right
...
18202 Right
18203 Right
18204 Right
18205 Right
18206 Right
Name: Preferred Foot, Length: 18207, dtype: object
In [12]:
left = fifa.loc[fifa['Preferred Foot'] == 'Left'].count()[0]
right = fifa.loc[fifa['Preferred Foot'] == 'Right'].count()[0]
labels = [right, left]
plt.pie(labels, labeldistance = 1.3, labels = labels, colors = ['crimson', 'lightcoral'], autopct= '%.2f %%')
plt.title('Preferred Foot by FIFA Players', loc = 'center', fontdict = {'fontname': 'century', 'color': 'white', 'fontsize': '20'})
plt.style.use('dark_background')
plt.rcParams['figure.figsize'] = (10,4)
plt.show()
- Weight Distribution of FIFA Players
In [275]:
fifa.Weight
Out[275]:
0 159lbs
1 183lbs
2 150lbs
3 168lbs
4 154lbs
...
18202 134lbs
18203 170lbs
18204 148lbs
18205 154lbs
18206 176lbs
Name: Weight, Length: 18207, dtype: object
In [14]:
fifa.Weight = [
int(x.strip('lbs'))
if type(x) == str
else
x for x in fifa.Weight
]
fifa.Weight
Out[14]:
0 159.0
1 183.0
2 150.0
3 168.0
4 154.0
...
18202 134.0
18203 170.0
18204 148.0
18205 154.0
18206 176.0
Name: Weight, Length: 18207, dtype: float64
In [15]:
plt.style.use('seaborn-ticks')
light = fifa.loc[fifa.Weight < 125].count()[0]
light_medium = fifa[(fifa.Weight >= 125) & (fifa.Weight < 150)].count()[0]
medium = fifa[(fifa.Weight >= 150) & (fifa.Weight < 175)].count()[0]
medium_heavy = fifa[(fifa.Weight >= 175) & (fifa.Weight < 200)].count()[0]
heavy = fifa[fifa.Weight >= 200].count()[0]
weights = [light,light_medium, medium, medium_heavy, heavy]
label = ['under 125', '125-150', '150-175', '175-200', 'over 200']
explode = (.3,.2,0,0,.3)
plt.title('Weight of Professional Soccer Players (lbs)', loc = 'center', fontdict = {'fontname': 'century', 'color': 'navy', 'fontsize': '20'})
plt.pie(weights, labeldistance = 1.3, wedgeprops = {'linewidth': 5}, labels=label, explode=explode, pctdistance=0.9, autopct='%.2f %%')
plt.rcParams['figure.figsize'] = (10,4)
plt.show()
Box and Whiskers Chart¶
- Comparing FIFA teams with one another
In [18]:
plt.style.use('dark_background')
plt.figure(figsize = (6,8))
barcelona = fifa.loc[fifa.Club == 'FC Barcelona']['Overall']
madrid = fifa.loc[fifa.Club == 'Real Madrid']['Overall']
juventus = fifa.loc[fifa.Club == 'Juventus']['Overall']
manchester = fifa.loc[fifa.Club == 'Manchester United']['Overall']
plt.title('FIFA Football Teams Comparison', loc = 'center', fontdict = {'fontname': 'century', 'color': 'white', 'fontsize': '20'})
plt.boxplot([barcelona, madrid, juventus, manchester], labels = ['FC Barcelona', 'Real Madrid', 'Juventus', 'Manchester United'])
plt.show()